pip install pandas
Requirement already satisfied: pandas in d:\anaconda\lib\site-packages (2.0.3) Requirement already satisfied: python-dateutil>=2.8.2 in d:\anaconda\lib\site-packages (from pandas) (2.8.2) Requirement already satisfied: pytz>=2020.1 in d:\anaconda\lib\site-packages (from pandas) (2023.3.post1) Requirement already satisfied: tzdata>=2022.1 in d:\anaconda\lib\site-packages (from pandas) (2023.3) Requirement already satisfied: numpy>=1.21.0 in d:\anaconda\lib\site-packages (from pandas) (1.24.3) Requirement already satisfied: six>=1.5 in d:\anaconda\lib\site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0) Note: you may need to restart the kernel to use updated packages.
pip install numpy
Requirement already satisfied: numpy in d:\anaconda\lib\site-packages (1.24.3) Note: you may need to restart the kernel to use updated packages.
pip install plotly
Requirement already satisfied: plotly in d:\anaconda\lib\site-packages (5.9.0) Requirement already satisfied: tenacity>=6.2.0 in d:\anaconda\lib\site-packages (from plotly) (8.2.2) Note: you may need to restart the kernel to use updated packages.
pip install sklearn
Collecting sklearnNote: you may need to restart the kernel to use updated packages.
error: subprocess-exited-with-error
python setup.py egg_info did not run successfully.
exit code: 1
[15 lines of output]
The 'sklearn' PyPI package is deprecated, use 'scikit-learn'
rather than 'sklearn' for pip commands.
Here is how to fix this error in the main use cases:
- use 'pip install scikit-learn' rather than 'pip install sklearn'
- replace 'sklearn' by 'scikit-learn' in your pip requirements files
(requirements.txt, setup.py, setup.cfg, Pipfile, etc ...)
- if the 'sklearn' package is used by one of your dependencies,
it would be great if you take some time to track which package uses
'sklearn' instead of 'scikit-learn' and report it to their issue tracker
- as a last resort, set the environment variable
SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True to avoid this error
More information is available at
https://github.com/scikit-learn/sklearn-pypi-package
[end of output]
note: This error originates from a subprocess, and is likely not a problem with pip.
error: metadata-generation-failed
Encountered error while generating package metadata.
See above for output.
note: This is an issue with the package mentioned above, not pip.
hint: See above for details.
Downloading sklearn-0.0.post12.tar.gz (2.6 kB) Preparing metadata (setup.py): started Preparing metadata (setup.py): finished with status 'error'
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
df = pd.read_csv('D:/MY DESTINATION/PROJECTS/Projects/Cancer_breast_survive_prediction/BRCA.csv')
df.head()
| Patient_ID | Age | Gender | Protein1 | Protein2 | Protein3 | Protein4 | Tumour_Stage | Histology | ER status | PR status | HER2 status | Surgery_type | Date_of_Surgery | Date_of_Last_Visit | Patient_Status | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | TCGA-D8-A1XD | 36.0 | FEMALE | 0.080353 | 0.42638 | 0.54715 | 0.273680 | III | Infiltrating Ductal Carcinoma | Positive | Positive | Negative | Modified Radical Mastectomy | 15-Jan-17 | 19-Jun-17 | Alive |
| 1 | TCGA-EW-A1OX | 43.0 | FEMALE | -0.420320 | 0.57807 | 0.61447 | -0.031505 | II | Mucinous Carcinoma | Positive | Positive | Negative | Lumpectomy | 26-Apr-17 | 09-Nov-18 | Dead |
| 2 | TCGA-A8-A079 | 69.0 | FEMALE | 0.213980 | 1.31140 | -0.32747 | -0.234260 | III | Infiltrating Ductal Carcinoma | Positive | Positive | Negative | Other | 08-Sep-17 | 09-Jun-18 | Alive |
| 3 | TCGA-D8-A1XR | 56.0 | FEMALE | 0.345090 | -0.21147 | -0.19304 | 0.124270 | II | Infiltrating Ductal Carcinoma | Positive | Positive | Negative | Modified Radical Mastectomy | 25-Jan-17 | 12-Jul-17 | Alive |
| 4 | TCGA-BH-A0BF | 56.0 | FEMALE | 0.221550 | 1.90680 | 0.52045 | -0.311990 | II | Infiltrating Ductal Carcinoma | Positive | Positive | Negative | Other | 06-May-17 | 27-Jun-19 | Dead |
df.isnull().sum()
Patient_ID 7 Age 7 Gender 7 Protein1 7 Protein2 7 Protein3 7 Protein4 7 Tumour_Stage 7 Histology 7 ER status 7 PR status 7 HER2 status 7 Surgery_type 7 Date_of_Surgery 7 Date_of_Last_Visit 24 Patient_Status 20 dtype: int64
df.shape
(341, 16)
df= df.dropna()
df.isnull().sum()
Patient_ID 0 Age 0 Gender 0 Protein1 0 Protein2 0 Protein3 0 Protein4 0 Tumour_Stage 0 Histology 0 ER status 0 PR status 0 HER2 status 0 Surgery_type 0 Date_of_Surgery 0 Date_of_Last_Visit 0 Patient_Status 0 dtype: int64
df.info()
<class 'pandas.core.frame.DataFrame'> Index: 317 entries, 0 to 333 Data columns (total 16 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Patient_ID 317 non-null object 1 Age 317 non-null float64 2 Gender 317 non-null object 3 Protein1 317 non-null float64 4 Protein2 317 non-null float64 5 Protein3 317 non-null float64 6 Protein4 317 non-null float64 7 Tumour_Stage 317 non-null object 8 Histology 317 non-null object 9 ER status 317 non-null object 10 PR status 317 non-null object 11 HER2 status 317 non-null object 12 Surgery_type 317 non-null object 13 Date_of_Surgery 317 non-null object 14 Date_of_Last_Visit 317 non-null object 15 Patient_Status 317 non-null object dtypes: float64(5), object(11) memory usage: 42.1+ KB
#Gender column to see how many females and males are there
print(df.Gender.value_counts())
Gender FEMALE 313 MALE 4 Name: count, dtype: int64
# stage of tumour of the patients
stage = df["Tumour_Stage"].value_counts()
print(stage)
Tumour_Stage II 180 III 77 I 60 Name: count, dtype: int64
stage = df["Tumour_Stage"].value_counts()
transactions = stage.index
quantity = stage.values
figure = px.pie(df, values=quantity, names= transactions, hole=0.5, title="Tumour Stages of Patients" )
figure.show()
# Histology
histology = df["Histology"].value_counts()
transactions = histology.index
quantity = histology.values
figure = px.pie(df,
values=quantity,
names=transactions,hole = 0.5,
title="Histology of Patients")
figure.show()
#ER status, PR status, and HER2 status of the patients
# ER status
print(df["ER status"].value_counts())
# PR status
print(df["PR status"].value_counts())
# HER2 status
print(df["HER2 status"].value_counts())
ER status Positive 317 Name: count, dtype: int64 PR status Positive 317 Name: count, dtype: int64 HER2 status Negative 288 Positive 29 Name: count, dtype: int64
#surgery_type
# Surgery_type
surgery = df["Surgery_type"].value_counts()
transactions = surgery.index
quantity = surgery.values
figure = px.pie(df,
values=quantity,
names=transactions,hole = 0.5,
title="Type of Surgery of Patients")
figure.show()
df.head()
| Patient_ID | Age | Gender | Protein1 | Protein2 | Protein3 | Protein4 | Tumour_Stage | Histology | ER status | PR status | HER2 status | Surgery_type | Date_of_Surgery | Date_of_Last_Visit | Patient_Status | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | TCGA-D8-A1XD | 36.0 | FEMALE | 0.080353 | 0.42638 | 0.54715 | 0.273680 | III | Infiltrating Ductal Carcinoma | Positive | Positive | Negative | Modified Radical Mastectomy | 15-Jan-17 | 19-Jun-17 | Alive |
| 1 | TCGA-EW-A1OX | 43.0 | FEMALE | -0.420320 | 0.57807 | 0.61447 | -0.031505 | II | Mucinous Carcinoma | Positive | Positive | Negative | Lumpectomy | 26-Apr-17 | 09-Nov-18 | Dead |
| 2 | TCGA-A8-A079 | 69.0 | FEMALE | 0.213980 | 1.31140 | -0.32747 | -0.234260 | III | Infiltrating Ductal Carcinoma | Positive | Positive | Negative | Other | 08-Sep-17 | 09-Jun-18 | Alive |
| 3 | TCGA-D8-A1XR | 56.0 | FEMALE | 0.345090 | -0.21147 | -0.19304 | 0.124270 | II | Infiltrating Ductal Carcinoma | Positive | Positive | Negative | Modified Radical Mastectomy | 25-Jan-17 | 12-Jul-17 | Alive |
| 4 | TCGA-BH-A0BF | 56.0 | FEMALE | 0.221550 | 1.90680 | 0.52045 | -0.311990 | II | Infiltrating Ductal Carcinoma | Positive | Positive | Negative | Other | 06-May-17 | 27-Jun-19 | Dead |
df["Tumour_Stage"] = df["Tumour_Stage"].map({"I": 1, "II": 2, "III": 3})
df.head()
| Patient_ID | Age | Gender | Protein1 | Protein2 | Protein3 | Protein4 | Tumour_Stage | Histology | ER status | PR status | HER2 status | Surgery_type | Date_of_Surgery | Date_of_Last_Visit | Patient_Status | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | TCGA-D8-A1XD | 36.0 | FEMALE | 0.080353 | 0.42638 | 0.54715 | 0.273680 | 3 | Infiltrating Ductal Carcinoma | Positive | Positive | Negative | Modified Radical Mastectomy | 15-Jan-17 | 19-Jun-17 | Alive |
| 1 | TCGA-EW-A1OX | 43.0 | FEMALE | -0.420320 | 0.57807 | 0.61447 | -0.031505 | 2 | Mucinous Carcinoma | Positive | Positive | Negative | Lumpectomy | 26-Apr-17 | 09-Nov-18 | Dead |
| 2 | TCGA-A8-A079 | 69.0 | FEMALE | 0.213980 | 1.31140 | -0.32747 | -0.234260 | 3 | Infiltrating Ductal Carcinoma | Positive | Positive | Negative | Other | 08-Sep-17 | 09-Jun-18 | Alive |
| 3 | TCGA-D8-A1XR | 56.0 | FEMALE | 0.345090 | -0.21147 | -0.19304 | 0.124270 | 2 | Infiltrating Ductal Carcinoma | Positive | Positive | Negative | Modified Radical Mastectomy | 25-Jan-17 | 12-Jul-17 | Alive |
| 4 | TCGA-BH-A0BF | 56.0 | FEMALE | 0.221550 | 1.90680 | 0.52045 | -0.311990 | 2 | Infiltrating Ductal Carcinoma | Positive | Positive | Negative | Other | 06-May-17 | 27-Jun-19 | Dead |
df["Histology"] = df["Histology"].map({"Infiltrating Ductal Carcinoma": 1,
"Infiltrating Lobular Carcinoma": 2, "Mucinous Carcinoma": 3})
df.head()
| Patient_ID | Age | Gender | Protein1 | Protein2 | Protein3 | Protein4 | Tumour_Stage | Histology | ER status | PR status | HER2 status | Surgery_type | Date_of_Surgery | Date_of_Last_Visit | Patient_Status | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | TCGA-D8-A1XD | 36.0 | FEMALE | 0.080353 | 0.42638 | 0.54715 | 0.273680 | 3 | 1 | Positive | Positive | Negative | Modified Radical Mastectomy | 15-Jan-17 | 19-Jun-17 | Alive |
| 1 | TCGA-EW-A1OX | 43.0 | FEMALE | -0.420320 | 0.57807 | 0.61447 | -0.031505 | 2 | 3 | Positive | Positive | Negative | Lumpectomy | 26-Apr-17 | 09-Nov-18 | Dead |
| 2 | TCGA-A8-A079 | 69.0 | FEMALE | 0.213980 | 1.31140 | -0.32747 | -0.234260 | 3 | 1 | Positive | Positive | Negative | Other | 08-Sep-17 | 09-Jun-18 | Alive |
| 3 | TCGA-D8-A1XR | 56.0 | FEMALE | 0.345090 | -0.21147 | -0.19304 | 0.124270 | 2 | 1 | Positive | Positive | Negative | Modified Radical Mastectomy | 25-Jan-17 | 12-Jul-17 | Alive |
| 4 | TCGA-BH-A0BF | 56.0 | FEMALE | 0.221550 | 1.90680 | 0.52045 | -0.311990 | 2 | 1 | Positive | Positive | Negative | Other | 06-May-17 | 27-Jun-19 | Dead |
df["ER status"] = df["ER status"].map({"Positive": 1})
df["PR status"] = df["PR status"].map({"Positive": 1})
df["HER2 status"] = df["HER2 status"].map({"Positive": 1, "Negative": 2})
df["Gender"] = df["Gender"].map({"MALE": 0, "FEMALE": 1})
df["Surgery_type"] = df["Surgery_type"].map({"Other": 1, "Modified Radical Mastectomy": 2,
"Lumpectomy": 3, "Simple Mastectomy": 4})
print(df.head())
Patient_ID Age Gender Protein1 Protein2 Protein3 Protein4 \ 0 TCGA-D8-A1XD 36.0 1 0.080353 0.42638 0.54715 0.273680 1 TCGA-EW-A1OX 43.0 1 -0.420320 0.57807 0.61447 -0.031505 2 TCGA-A8-A079 69.0 1 0.213980 1.31140 -0.32747 -0.234260 3 TCGA-D8-A1XR 56.0 1 0.345090 -0.21147 -0.19304 0.124270 4 TCGA-BH-A0BF 56.0 1 0.221550 1.90680 0.52045 -0.311990 Tumour_Stage Histology ER status PR status HER2 status Surgery_type \ 0 3 1 1 1 2 2 1 2 3 1 1 2 3 2 3 1 1 1 2 1 3 2 1 1 1 2 2 4 2 1 1 1 2 1 Date_of_Surgery Date_of_Last_Visit Patient_Status 0 15-Jan-17 19-Jun-17 Alive 1 26-Apr-17 09-Nov-18 Dead 2 08-Sep-17 09-Jun-18 Alive 3 25-Jan-17 12-Jul-17 Alive 4 06-May-17 27-Jun-19 Dead
status = df["Patient_Status"].value_counts()
status
Patient_Status Alive 255 Dead 62 Name: count, dtype: int64
x = np.array(df[['Age', 'Gender', 'Protein1', 'Protein2', 'Protein3','Protein4',
'Tumour_Stage', 'Histology', 'ER status', 'PR status',
'HER2 status', 'Surgery_type']])
y = np.array(df['Patient_Status'])
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.10, random_state=42)
model = SVC()
model.fit(x_train, y_train)
SVC()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
SVC()
model.score(x_test,y_test)
0.8125
# Prediction
# features = [['Age', 'Gender', 'Protein1', 'Protein2', 'Protein3','Protein4', 'Tumour_Stage', 'Histology', 'ER status', 'PR status', 'HER2 status', 'Surgery_type']]
features = np.array([[74, 1, -0.080353, -0.420320, 0.61447 , -0.273680, 3, 3, 0, 0, 2, 1,]])
print(model.predict(features))
['Alive']
import tkinter as tk
from tkinter import ttk
import numpy as np
from sklearn.svm import SVC
def predict():
features = [
float(entries['Age'].get()),
int(entries['Gender'].get()),
float(entries['Protein1'].get()),
float(entries['Protein2'].get()),
float(entries['Protein3'].get()),
float(entries['Protein4'].get()),
int(entries['Tumour Stage'].get()),
int(entries['Histology'].get()),
int(entries['ER status'].get()),
int(entries['PR status'].get()),
int(entries['HER2 status'].get()),
int(entries['Surgery Type'].get())
]
features = [features] # Convert to a 2D array
print("Input Features:", features)
prediction = model.predict(features)
print("Predicted Status:", prediction)
result_label.config(text=f"Predicted Status: {prediction[0]}")
# Create the main window
window = tk.Tk()
window.title("Breast Cancer Survival Prediction")
# Create labels and entry widgets for each feature
labels = ['Age', 'Gender', 'Protein1', 'Protein2', 'Protein3', 'Protein4',
'Tumour Stage', 'Histology', 'ER status', 'PR status',
'HER2 status', 'Surgery Type']
entries = {}
for i, label in enumerate(labels):
ttk.Label(window, text=label).grid(row=i, column=0, padx=10, pady=5)
entries[label] = ttk.Entry(window, textvariable=tk.StringVar(), width=20)
entries[label].grid(row=i, column=1)
# Create a button to trigger prediction
predict_button = ttk.Button(window, text="Predict", command=predict)
predict_button.grid(row=len(labels) + 1, column=0, columnspan=2, pady=10)
# Create a label to display the prediction result
result_label = ttk.Label(window, text="")
result_label.grid(row=len(labels) + 2, column=0, columnspan=2, pady=5)
# Run the main loop
window.mainloop()
Input Features: [[43.0, 1, -0.42032, 0.57807, 0.61447, -0.031505, 2, 3, 1, 1, 2, 3]] Predicted Status: ['Alive'] Input Features: [[43.0, 1, -0.42032, 0.57807, 0.61447, -0.031505, 2, 3, 1, 1, 2, 3]] Predicted Status: ['Alive']